import pandas as pd
import numpy as np
import pymysql
import pymongo
import re
import jieba
import logging
import plotly
import plotly.plotly as py
from plotly.offline import init_notebook_mode
import plotly.graph_objs as go
plotly.offline.init_notebook_mode()
mongo_client = pymongo.MongoClient("localhost", 27017)
db = mongo_client.db_huangsy
sz_rent_group = db.douban_sz_rent_group_v2
doc_cur = sz_rent_group.find({})
item_list = list()
for item in doc_cur:
item_list.append(item)
df_rent = pd.DataFrame(item_list)
d1 = df_rent.loc[df_rent.room_rent != ""].sample(100)
d1.to_excel(r"D:\pandas data\test1.xlsx")
df_rent_nc = df_rent.drop(columns='content')
df_rent_nc['date'] = df_rent_nc['response_time'].apply(lambda x: x[0:10])
df_rent_nc['room_rent'] = df_rent_nc['room_rent'].apply(lambda x: float(x) if x != "" else np.nan)
df_rent_nc['girl_only'] = df_rent_nc['girl_only'].astype(np.int)
df_rent_nc['man_only'] = df_rent_nc['man_only'].astype(np.int)
df_rent_nc['entire_room_num'] = df_rent_nc['entire_room_num'].apply(lambda x: int(x) if x != "" else np.nan)
df_rent_plot = df_rent_nc.loc[df_rent_nc.post_year >= '2014']
from pyecharts import Bar, Line, Overlap, Page, WordCloud
from pyecharts import online
online()
def graph_lines_rent(res_district_rent, width=960, height=400, **kwargs):
title = kwargs.get("title", "深圳豆瓣租房团各区房租年走势")
yaxis_name = kwargs.get("yaxis_name", "")
yaxis_formatter = kwargs.get("yaxis_formatter", "")
is_label_show = kwargs.get("is_label_show",True)
xaxis_rotate = kwargs.get("xaxis_rotate", 0)
color_list = ["#ff0000","#00ff00", "#00ffff",\
"#0000ff", "#ff00ff","#4a86e8", "#7f6000"]
line = Line(title, title_text_size = 20, title_pos="center", title_top="1%" ,width=width, height=height)
for district in res_district_rent.keys():
res = res_district_rent[district]
line.add(district, res['x'], res['y'], xaxis_rotate = xaxis_rotate, yaxis_formatter=yaxis_formatter, yaxis_name_gap=50, yaxis_name=yaxis_name, yaxis_label_textcolor="blue",\
line_width=2, legend_top="8%", label_color=color_list, is_smooth=True, is_label_show=is_label_show)
return line
def get_block_res(df, block="district", time_col="post_year"):
block_list = df[block].unique()
res_dict = {}
for blk in block_list:
data = {}
df_blk = df.loc[df[block] == blk]
x = df_blk[time_col].tolist()
y = df_blk['room_rent'].tolist()
data['x'] = x
data['y'] = y
res_dict[blk] = data
return res_dict
def get_block_res(df, block="district", time_col="post_year", y_col='room_rent'):
block_list = df[block].unique()
res_dict = {}
for blk in block_list:
data = {}
df_blk = df.loc[df[block] == blk]
x = df_blk[time_col].tolist()
y = df_blk[y_col].tolist()
data['x'] = x
data['y'] = y
res_dict[blk] = data
return res_dict
def filter_mean(x):
xn = x[~np.isnan(x)]
if len(xn) <= 6:
return np.nan
xa = np.array(xn)
xa = np.sort(xa)
xb = xa[3:-3]
return int(np.mean(xb))
def filter_median(x):
xn = x[~np.isnan(x)]
if len(xn) <= 6:
return np.nan
xa = np.array(xn)
xa = np.sort(xa)
xb = xa[3:-3]
return int(np.median(xb))
df_rent_plot = df_rent_plot.loc[~df_rent_plot.district.isin(['光明','坪山', '盐田', '大鹏'])]
district_rent = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.district != "")& (pd.notna(df_rent_plot.room_rent))]
district_rent_year = district_rent[['post_year', 'district', 'room_rent']].groupby(['post_year', 'district']).agg({'room_rent': filter_mean}).reset_index()
district_rent_year = district_rent_year.sort_values(by='post_year', ascending=True)
# district_rent_year['room_rent'] = district_rent_year['room_rent'].apply(lambda x: int(x))
district_list = ["南山","福田","宝安", "罗湖", "龙岗","龙华"]
data = []
#visible_list=[True, False, False, False, False,False]
layout = dict(
title = "2019年(截止0615)豆瓣深圳租房团各区价格分布",
xaxis=dict(title="平均单房房租"),
yaxis = dict(side = 'left', ticksuffix="%" ,zeroline = False)
)
for i, district in enumerate(district_list):
df = district_rent.loc[(district_rent.district == district) & (district_rent.post_year=='2019')]
df = df.loc[df.room_rent < 5000]
data.append(go.Histogram(x=df.room_rent, nbinsx=25, name=district,histnorm='percent',showlegend=True))
fig = dict(data=data,layout=layout)
plotly.offline.iplot(fig)
res_district_rent = get_block_res(district_rent_year, block='district', time_col='post_year')
line = graph_lines_rent(res_district_rent, **{'title': "豆瓣深圳租房团各区平均单房房租年走势"})
line
district_rent = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.district != "")& (pd.notna(df_rent_plot.room_rent)) & (df_rent_plot.post_year=='2018')]
district_rent_ym = district_rent[['year_month', 'district', 'room_rent']].groupby(['year_month', 'district']).agg({'room_rent': filter_mean}).reset_index()
district_rent_ym = district_rent_ym.sort_values(by='year_month', ascending=True)
#district_rent_ym['room_rent'] = district_rent_ym['room_rent'].apply(lambda x: int(x))
res_district_rent = get_block_res(district_rent_ym, block='district', time_col='year_month')
line = graph_lines_rent(res_district_rent, **{'title': "2018年豆瓣深圳租房团各区房租月走势"})
line
district_rent = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['求租']))& (df_rent_plot.district != "")&(df_rent_plot.year_month!='2019-06')]
district_rent_ym = district_rent[['year_month', 'district', 'room_rent']].groupby(['year_month', 'district']).count().reset_index()
district_rent_ym = district_rent_ym.sort_values(by='year_month', ascending=True)
district_rent_ym['room_rent'] = district_rent_ym['room_rent'].apply(lambda x: int(x))
res_district_rent = get_block_res(district_rent_ym, block='district', time_col='year_month')
line = graph_lines_rent(res_district_rent, **{'title': "豆瓣深圳租房团各区年月求租贴子数",'is_label_show': False, 'xaxis_rotate':30})
line
district_rent = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.district != "")&(df_rent_plot.year_month!='2019-06')]
district_rent_ym = district_rent[['year_month', 'district', 'room_rent']].groupby(['year_month', 'district']).count().reset_index()
district_rent_ym = district_rent_ym.sort_values(by='year_month', ascending=True)
district_rent_ym['room_rent'] = district_rent_ym['room_rent'].apply(lambda x: int(x))
res_district_rent = get_block_res(district_rent_ym, block='district', time_col='year_month')
line = graph_lines_rent(res_district_rent,**{'title': "豆瓣深圳租房团各区年月招转租贴子数",'is_label_show': False, 'xaxis_rotate':30})
line
district_rent_rate = df_rent_plot.loc[(df_rent_plot.rent_type != "")&(df_rent_plot.district != "")&(df_rent_plot.post_year == '2019')].copy()
district_rent_r2019 = district_rent_rate[[ 'district', 'rent_type', 'room_rent']].groupby(['district', 'rent_type']).count().reset_index()
district_rent_r2019 = district_rent_r2019.sort_values('rent_type', ascending=False)
# fig = tools.make_subplots(rows = 2, cols = 3)
district_list = ["南山","福田","宝安", "罗湖", "龙岗","龙华"]
data = list()
anns = list()
for i, district in enumerate(district_list):
df = district_rent_r2019.loc[district_rent_r2019.district == district]
data.append(go.Pie(labels=df.rent_type.tolist(), values=df.room_rent.tolist(), name=district, hoverinfo='label+percent', textinfo='percent',
textfont=dict(size=10), hole=0.4, domain=dict(x=[0.33*(i%3),0.33*(i%3+1)],y=[0.4*(int(i/3)), 0.4*(int(i/3+1))-0.05])))
anns.append(dict(font=dict(size=20),
text=district,
showarrow=False,
#print_grid=False,
# Specify text position (place text in a hole of pie)
x= 0.33*(i%3),
y=0.4*(int(i/3)+1)-0.05
))
layout = go.Layout(title ='2019年豆瓣深圳租房团各区贴子数租寻占比',
annotations=anns,
#print_grid=False,
height = 600,
width = 960
# Hide legend if you want
#showlegend=False
)
fig = go.Figure(data=data,layout=layout)
plotly.offline.iplot(fig)
district_rent = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.district != "")].copy()
district_look = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['求租']))& (df_rent_plot.district != "")]
district_look_ym = district_rent[['year_month', 'district', 'room_rent']].groupby(['year_month', 'district']).median().reset_index()
district_look_ym.rename(columns={'room_rent': 'look_rent'}, inplace=True)
district_rent = district_rent.merge(district_look_ym, how='left', on=['year_month','district'])
district_rent['is_match'] = district_rent[['room_rent','look_rent']].apply(lambda x: np.abs(x['room_rent']- x['look_rent'])/x['look_rent'] < 0.2, axis=1)
# district_rent = district_rent.loc[district_rent.is_match==True]
district_look_ym = district_look[['year_month', 'district', 'room_rent']].groupby(['year_month', 'district']).count().reset_index()
district_look_ym.rename(columns={'room_rent': 'look_rent'}, inplace=True)
# district_rent = district_rent.loc[district_rent.is_match==True]
district_rent_ym = district_rent[['year_month', 'district', 'room_rent']].groupby(['year_month', 'district']).count().reset_index()
# district_rent_ym.head()
district_rent_ym = district_rent_ym.merge(district_look_ym, how='left', on=['year_month','district'])
district_rent_ym['rent_look_rate'] = (district_rent_ym['room_rent']/district_rent_ym['look_rent']).apply(lambda x: np.round(x,1))
district_rent_ym.loc[district_rent_ym.rent_look_rate.isin([np.inf,-np.inf]),'rent_look_rate'] = np.nan
district_rent_ym.loc[district_rent_ym.rent_look_rate >= 40 ,'rent_look_rate'] = 40
res_district_rent = get_block_res(district_rent_ym, block='district', time_col='year_month', y_col="rent_look_rate")
line = graph_lines_rent(res_district_rent,**{'title': "豆瓣深圳租房团各区租寻比走势",'is_label_show': False, 'xaxis_rotate':30})
line
district_rent = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.metro_station != "")]
metro_rent_year = pd.pivot_table(district_rent, index='metro_station',columns='post_year', values='room_rent', aggfunc= filter_mean)
metro_rent_year.index.name = ""
metro_rent_year.columns.name = ""
columns_list = list(metro_rent_year.columns)
for row_name, row_val in metro_rent_year.iterrows():
row_mean = np.mean(row_val)
for i, col in enumerate(columns_list[:-1]):
col_next = columns_list[i+1]
delta = np.abs(row_val[col] - row_val[col_next])/row_mean
if np.isnan(delta):
continue
else:
if delta > 0.5:
if np.abs(row_val[col] - row_mean) > np.abs(row_val[col_next] - row_mean):
metro_rent_year.loc[row_name, col] = np.nan
else:
metro_rent_year.loc[row_name, col_next] = np.nan
metro_rent_year = metro_rent_year.sort_values(by="2019", ascending=False)
metro_rent_year['order']= list(range(1, metro_rent_year.shape[0] + 1))
metro_rent_year['metro_station']= metro_rent_year.index
metro_rent_year.index = list(range(1, metro_rent_year.shape[0] + 1))
metro_rent_year = metro_rent_year[list(metro_rent_year.columns[-2:])+list(metro_rent_year.columns[0:-2])]
# metro_rent_year.head()
district_rent = df_rent_plot.loc[(df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.metro_station != "")]
metro_cnt_year = pd.pivot_table(district_rent, index='metro_station',columns='post_year', values='room_rent', aggfunc= len)
metro_cnt_year = metro_cnt_year.sort_values(by="2019", ascending=False)
metro_cnt_year.index.name = ""
metro_cnt_year.columns.name = ""
metro_cnt_year['order']= list(range(1, metro_cnt_year.shape[0] + 1))
metro_cnt_year['metro_station']= metro_cnt_year.index
metro_cnt_year.index = list(range(1, metro_cnt_year.shape[0] + 1))
metro_cnt_year = metro_cnt_year[list(metro_cnt_year.columns[-2:])+list(metro_cnt_year.columns[0:-2])]
# metro_cnt_year.head()
import plotly
import plotly.plotly as py
from plotly.offline import init_notebook_mode
import plotly.graph_objs as go
plotly.offline.init_notebook_mode()
def table_chart(df,title):
header_values = []
for col in df.columns:
if col == "metro_station":
n_col = ["<b>" + "地铁站" + "</b>"]
elif col== "order":
n_col = ["<b>" + "" + "</b>"]
else:
n_col = ["<b>" + col + "</b>"]
header_values.append(n_col)
table_trace2 = go.Table(
domain=dict(x=[0, 1],
y=[0.56, 1]),
columnwidth=[1]+[3]*7,
columnorder=list(range(8)),
header = dict(height = 40,
values = header_values,
line = dict(color='rgb(50, 50, 50)'),
align = ['left'] * 8,
font = dict(color=['rgb(45, 45, 45)'] * 8, size=12),
fill = dict(color='#d562be')),
cells = dict(values = [df[k].tolist() for k in df.columns],
line = dict(color='#506784'),
align = ['left'] * 8,
font = dict(color=['rgb(40, 40, 40)'] * 8, size=[12]+[15]*7),
suffix=[None] * 8,
height = 27,
fill = dict(color=['rgb(255, 204, 255)','rgb(235, 193, 238)', 'rgba(228, 222, 249, 0.65)']))
)
x = list(df.columns[2:])
trace_list = [table_trace2]
color_list = ["#ff0000", "#ff9900","#ffff00","#00ff00", "#00ffff",\
"#4a86e8", "#0000ff", "#9900ff", "#ff00ff", "#7f6000",\
"#006600", "#0000cc", "#996633", "#660066"]
for i in range(0,12):
metro = df.iloc[i].tolist()[1]
y = df.iloc[i].tolist()[2:]
trace=go.Scatter(
x=x,
y=y,
xaxis='x1',
yaxis='y1',
mode='lines',
line=dict(width=2, color=color_list[i]),
name=metro
)
trace_list.append(trace)
axis=dict(
showline=True,
zeroline=False,
showgrid=True,
mirror=True,
ticklen=4,
gridcolor='#ffffff',
tickfont=dict(size=10)
)
layout = dict(
width=980,
height=800,
autosize=False,
title='<b>%s</b>'%title,
titlefont=dict(size=20, color="#000000"),
margin = dict(t=50),
showlegend=True,
legend=dict(x=1.01, y=0.1),
xaxis1=dict(axis, **dict(domain=[0, 1], anchor='y1', showticklabels=True)),
yaxis1=dict(axis, **dict(domain=[0.0, 0.5], anchor='x1',hoverformat='d')),
plot_bgcolor='rgba(228, 222, 249, 0.65)'
)
fig = dict(data=trace_list, layout=layout)
plotly.offline.iplot(fig)
table_chart(metro_rent_year,title="2014年到2019年豆瓣深圳租房团地铁站附近平均单房房租及年趋势")
table_chart(metro_cnt_year, title="2014年到2019年(截止0615)豆瓣深圳租房团各地铁站附近相关租房帖子数及年趋势")
district_metro_map = df_rent_plot.drop_duplicates(['metro_station'])[['metro_station', 'district']]
metro_rent_year['rate'] = ((metro_rent_year['2019'] - metro_rent_year['2018'])/metro_rent_year['2018']).apply(lambda x: np.round(x, 3))
metro_rent_2019 = metro_rent_year[['metro_station', '2019', 'rate']].copy()
metro_rent_2019 = metro_rent_2019.merge(district_metro_map, how="left", on=['metro_station'])
metro_cnt_2019 = metro_cnt_year[['metro_station', '2019']]
metro_rent_ns_2019 = metro_rent_2019.loc[metro_rent_2019.district == "南山"]
data = []
visible_list=[True, False,False,False,False,False]
for i, district in enumerate(district_list):
df = metro_rent_2019.loc[metro_rent_2019.district == district]
trace1 = go.Scatter(
x=df['metro_station'],
y=df['rate'],
name='同比涨幅',
yaxis = 'y2',
mode='lines',
visible=visible_list[i]
)
trace2 = go.Bar(
x=df['metro_station'],
y=df['2019'],
name='2019年平均单房房租',
marker = dict(color = 'blue', opacity=1),
visible=visible_list[i]
)
data.append(trace2)
data.append(trace1)
update_menus = \
[
dict(active=0,
x=0.02,
y=1.15,
buttons=[
dict(label = district_list[0],
method = 'update',
args = [{'visible': [True]*2 + [False]*10},
{'title': "2019年豆瓣深圳租房团{0}区地铁站周围平均单房房租及同比涨幅".format(district_list[0])}]),
dict(label = district_list[1],
method = 'update',
args = [{'visible': [False]*2 + [True]*2 + [False]*8},
{'title': "2019年豆瓣深圳租房团{0}区地铁站周围平均单房房租及同比涨幅".format(district_list[1])}]),
dict(label = district_list[2],
method = 'update',
args = [{'visible': [False]*4 + [True]*2 + [False]*6},
{'title': "2019年豆瓣深圳租房团{0}区地铁站周围平均单房房租及同比涨幅".format(district_list[2])}]),
dict(label = district_list[3],
method = 'update',
args = [{'visible': [False]*6 + [True]*2 + [False]*4},
{'title': "2019年豆瓣深圳租房团{0}区地铁站周围平均单房房租及同比涨幅".format(district_list[3])}]),
dict(label = district_list[4],
method = 'update',
args = [{'visible': [False]*8 + [True]*2 + [False]*2},
{'title': "2019年豆瓣深圳租房团{0}区地铁站周围平均单房房租及同比涨幅".format(district_list[4])}]),
dict(label = district_list[5],
method = 'update',
args = [{'visible': [False]*10 + [True]*2},
{'title': "2019年豆瓣深圳租房团{0}区地铁站周围平均单房房租及同比涨幅".format(district_list[4])}])
],
)
]
layout = dict(
title = "2019年豆瓣深圳租房团南山区地铁站周围平均单房房租及同比涨幅",
yaxis = dict(side = 'left',showgrid = False, ticksuffix="元" ,zeroline = False),
yaxis2 = dict(side = 'right', tickformat= "%",overlaying = "y" , showgrid = False, zeroline = False),
legend=dict(x=1.05, y=0.8),
#updatemenudefaults=update_menus,
updatemenus = update_menus)
fig = dict(data=data, layout=layout)
plotly.offline.iplot(fig)
line_rent = df_rent_plot.loc[(df_rent_plot.metro_station != "")& (df_rent_plot.post_year=='2019')]
line_rent = line_rent[['line', 'room_rent']]
line_rent_series = line_rent['line'].str.split(r'\|', expand=True).stack().\
reset_index(level=1, drop=True).rename('line')
line_rent = line_rent.drop("line", axis=1).join(line_rent_series)
line_rent['line'] = line_rent['line'] + '号线'
line_rent_count = pd.DataFrame(line_rent.line.value_counts())
# line_rent_count.head(5)
trace = go.Bar(
x=line_rent_count.index,
y=line_rent_count.line,
name='帖子数',
marker = dict(color = 'blue', opacity=1))
layout = dict(
title = "2019年豆瓣深圳租房团各地铁线附近租房帖子数分布",
)
fig = dict(data=[trace], layout=layout)
plotly.offline.iplot(fig)
import math
PI = math.pi
def _transformlat(coordinates):
lng = coordinates[ : , 0] - 105
lat = coordinates[ : , 1] - 35
ret = -100 + 2 * lng + 3 * lat + 0.2 * lat * lat + \
0.1 * lng * lat + 0.2 * np.sqrt(np.fabs(lng))
ret += (20 * np.sin(6 * lng * PI) + 20 *
np.sin(2 * lng * PI)) * 2 / 3
ret += (20 * np.sin(lat * PI) + 40 *
np.sin(lat / 3 * PI)) * 2 / 3
ret += (160 * np.sin(lat / 12 * PI) + 320 *
np.sin(lat * PI / 30.0)) * 2 / 3
return ret
def _transformlng(coordinates):
lng = coordinates[ : , 0] - 105
lat = coordinates[ : , 1] - 35
ret = 300 + lng + 2 * lat + 0.1 * lng * lng + \
0.1 * lng * lat + 0.1 * np.sqrt(np.fabs(lng))
ret += (20 * np.sin(6 * lng * PI) + 20 *
np.sin(2 * lng * PI)) * 2 / 3
ret += (20 * np.sin(lng * PI) + 40 *
np.sin(lng / 3 * PI)) * 2 / 3
ret += (150 * np.sin(lng / 12 * PI) + 300 *
np.sin(lng / 30 * PI)) * 2 / 3
return ret
def gcj02_to_wgs84(coordinates):
"""
GCJ-02转WGS-84
:param coordinates: GCJ-02坐标系的经度和纬度的numpy数组
:returns: WGS-84坐标系的经度和纬度的numpy数组
"""
ee = 0.006693421622965943 # 偏心率平方
a = 6378245 # 长半轴
lng = coordinates[ : , 0]
lat = coordinates[ : , 1]
is_in_china= (lng > 73.66) & (lng < 135.05) & (lat > 3.86) & (lat < 53.55)
_transform = coordinates[is_in_china] #只对国内的坐标做偏移
dlat = _transformlat(_transform)
dlng = _transformlng(_transform)
radlat = _transform[ : , 1] / 180 * PI
magic = np.sin(radlat)
magic = 1 - ee * magic * magic
sqrtmagic = np.sqrt(magic)
dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * PI)
dlng = (dlng * 180.0) / (a / sqrtmagic * np.cos(radlat) * PI)
mglat = _transform[ : , 1] + dlat
mglng = _transform[ : , 0] + dlng
coordinates[is_in_china] = np.array([
_transform[ : , 0] * 2 - mglng, _transform[ : , 1] * 2 - mglat
]).T
return coordinates
def bd09_to_gcj02(coordinates):
"""
BD-09转GCJ-02
:param coordinates: BD-09坐标系的经度和纬度的numpy数组
:returns: GCJ-02坐标系的经度和纬度的numpy数组
"""
x_pi = PI * 3000 / 180
x = coordinates[ : , 0] - 0.0065
y = coordinates[ : , 1] - 0.006
z = np.sqrt(x * x + y * y) - 0.00002 * np.sin(y * x_pi)
theta = np.arctan2(y, x) - 0.000003 * np.cos(x * x_pi)
lng = z * np.cos(theta)
lat = z * np.sin(theta)
coordinates = np.array([lng, lat]).T
return coordinates
def bd09_to_wgs84(coordinates):
"""
BD-09转WGS-84
:param coordinates: BD-09坐标系的经度和纬度的numpy数组
:returns: WGS-84坐标系的经度和纬度的numpy数组
"""
return gcj02_to_wgs84(bd09_to_gcj02(coordinates))
def mercator_to_bd09(mercator):
"""
BD-09MC转BD-09
:param coordinates: GCJ-02坐标系的经度和纬度的numpy数组
:returns: WGS-84坐标系的经度和纬度的numpy数组
"""
MCBAND = [12890594.86, 8362377.87, 5591021, 3481989.83, 1678043.12, 0]
MC2LL = [[1.410526172116255e-08, 8.98305509648872e-06, -1.9939833816331,
200.9824383106796, -187.2403703815547, 91.6087516669843,
-23.38765649603339, 2.57121317296198, -0.03801003308653,
17337981.2],
[-7.435856389565537e-09, 8.983055097726239e-06, -0.78625201886289,
96.32687599759846, -1.85204757529826, -59.36935905485877,
47.40033549296737, -16.50741931063887, 2.28786674699375,
10260144.86],
[-3.030883460898826e-08, 8.98305509983578e-06, 0.30071316287616,
59.74293618442277, 7.357984074871, -25.38371002664745,
13.45380521110908, -3.29883767235584, 0.32710905363475,
6856817.37],
[-1.981981304930552e-08, 8.983055099779535e-06, 0.03278182852591,
40.31678527705744, 0.65659298677277, -4.44255534477492,
0.85341911805263, 0.12923347998204, -0.04625736007561,
4482777.06],
[3.09191371068437e-09, 8.983055096812155e-06, 6.995724062e-05,
23.10934304144901, -0.00023663490511, -0.6321817810242,
-0.00663494467273, 0.03430082397953, -0.00466043876332,
2555164.4],
[2.890871144776878e-09, 8.983055095805407e-06, -3.068298e-08,
7.47137025468032, -3.53937994e-06, -0.02145144861037,
-1.234426596e-05, 0.00010322952773, -3.23890364e-06,
826088.5]]
x = np.abs(mercator[ : , 0])
y = np.abs(mercator[ : , 1])
coef = np.array([
MC2LL[index] for index in
(np.tile(y.reshape((-1, 1)), (1, 6)) < MCBAND).sum(axis=1)
])
return converter(x, y, coef)
def converter(x, y, coef):
x_temp = coef[ : ,0] + coef[ : ,1] * np.abs(x)
x_n = np.abs(y) / coef[ : ,9]
y_temp = coef[ : ,2] + coef[ : ,3] * x_n + coef[ : ,4] * x_n ** 2 + \
coef[ : ,5] * x_n ** 3 + coef[ : ,6] * x_n ** 4 + coef[ : ,7] * x_n ** 5 + \
coef[ : ,8] * x_n ** 6
x[x < 0] = -1
x[x >= 0] = 1
y[y < 0] = -1
y[y >= 0] = 1
x_temp *= x
y_temp *= y
coordinates = np.array([x_temp, y_temp]).T
return coordinates
import requests
url = "http://map.baidu.com/?qt=bsi&c=340"
headers = {
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
"Accept-Encoding": "gzip, deflate, sdch",
}
s = requests.get(url, headers=headers)
s_content = eval(s.content)
#bd09_to_wgs84(mercator_to_bd09(np.array([[12706537.33, 2564680.52]]))).tolist()
line_name_list = []
line_uid_list = []
stops_list = []
xys_list = []
for line in s_content['content']:
line_name = re.match('(\d+号线).*',line['line_name']).group(1)
if line_name in line_name_list:
continue
line_name_list.append(line_name)
line_uid_list.append(line['line_uid'])
stop_list = []
xy_list = []
for stop in line['stops']:
stop_list.append(stop['name']+'站')
xy_list.append([stop['x'], stop['y']])
stops_list.append(stop_list)
xys_list.append(bd09_to_wgs84(mercator_to_bd09(np.array(xy_list))))
import json
class MyEncoder(json.JSONEncoder):
def default(self, obj):
"""
只要检查到了是bytes类型的数据就把它转为str类型
:param obj:
:return:
"""
if isinstance(obj, bytes):
return str(obj, encoding='utf-8')
return json.JSONEncoder.default(self, obj)
import time
line_color_list = []
trace_list = []
for line_uid in line_uid_list:
url = "https://map.baidu.com/?qt=bsl&tps=&newmap=1&uid={0}&c=340".format(line_uid)
s = requests.get(url, headers=headers)
s_content =json.loads(s.content, encoding=MyEncoder)
line_color_list.append(s_content['content'][0]['line_color'])
trace_mercator = np.array(s_content['content'][0]['geo'].split('|')[2][:-1].split(','), dtype=float).reshape((-1, 2))
trace_list.append(bd09_to_wgs84(mercator_to_bd09(trace_mercator)))
time.sleep(1)
line_info = {'line_name': line_name_list, 'metro_stations': stops_list, 'mst_coords': xys_list, 'line_color': line_color_list, 'trace_coords': trace_list}
df_line_info = pd.DataFrame(line_info)
#df_line_info.head()
dfs = list()
for i, row in df_line_info.iterrows():
mst_list = row['metro_stations']
mstc_list = row['mst_coords']
line_name = [row['line_name']]*len(mstc_list)
line_color = [row['line_color']]*len(mstc_list)
df = pd.DataFrame(dict(metro_station=mst_list, mst_coord=mstc_list.tolist(), line_name=line_name, line_color=line_color))
dfs.append(df)
df_metro_info = pd.concat(dfs, axis=0)
df_metro_info = df_metro_info.merge(metro_cnt_2019, how='left', on='metro_station')
#df_metro_info.head()
df_metro_info.fillna(0, inplace=True)
df_metro_info['size_r'] = df_metro_info['2019'].apply(lambda x: np.round(np.sqrt(x+1)/1.5 + 2,2))
df_metro_info['hoverinfo'] = df_metro_info['metro_station'] + ',' + df_metro_info['2019'].apply(lambda x: str(int(x)))
token = "pk.eyJ1IjoiaHVhbmdzaGl5YW5nIiwiYSI6ImNqeTZwaWJ3MjBrcXMzbW9id2tobDRubHMifQ.zdUbKs85QiEUquPMdjleGA"
data = []
layout = go.Layout(
title='2019年豆瓣深圳租房团租房贴子数地铁线路分布图',
width =960,
height = 600,
autosize=True,
mapbox=dict(
accesstoken=token,
style = 'mapbox://styles/huangshiyang/cjy6xtpl40yxw1cohcdfdtaoq',
bearing=0,
center=dict(
lat=22.5427758091, #广州市纬度
lon= 114.0579807461 #广州市经度
),
pitch=0,
zoom=10
),
)
for i, row in df_line_info.iterrows():
line_name = row['line_name']
metro_stations = df_metro_info.loc[df_metro_info.line_name==line_name].metro_station.tolist()
station_coordinates = np.array(df_metro_info.loc[df_metro_info.line_name==line_name].mst_coord.tolist())
size = df_metro_info.loc[df_metro_info.line_name==line_name].size_r.tolist()
trace_coordinates = row['trace_coords']
metros = row['metro_stations']
hoverinfo = df_metro_info.loc[df_metro_info.line_name==line_name].hoverinfo.tolist()
color = row['line_color']
data.append(go.Scattermapbox(
lon=trace_coordinates[:, 0], #路线点经度
lat=trace_coordinates[:, 1], #路线点纬度
mode='lines',
# 设置路线的参数
hoverinfo='none',
line=go.scattermapbox.Line(
width=2,
color=color
),
name=line_name, #线路名称,显示在图例(legend)上
legendgroup=line_name
))
data.append(go.Scattermapbox(
lon=station_coordinates[:, 0], #站台经度
lat=station_coordinates[:, 1], #站台纬度
mode='markers+text',
text=metros,
textfont=dict(size=8),
# hvo=1,
hovertext=hoverinfo,
hoverinfo='text',
# hoverlabel=metros,
# 设置标记点的参数
marker=go.scattermapbox.Marker(
size=size,
color=color,
#symbol='gift',
),
opacity=0.6,
name=line_name, #线路名称,显示在图例(legend)及鼠标悬浮在标记点时的路线名上
legendgroup=line_name, #设置与路线同组,当隐藏该路线时隐藏标记点
showlegend=False #不显示图例(legend)
))
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)
df_rent_for = df_rent_plot.loc[df_rent_plot.rent_type.isin(['招租','转租'])& (df_rent_plot.district != "") & (df_rent_plot.post_year >= '2014')]
pivot_rent_for = pd.pivot_table(df_rent_for, values='girl_only', index='district', columns='post_year', aggfunc=np.mean )
pivot_rent_for = pivot_rent_for.applymap(lambda x: str(np.round(x*100, 1))+'%')
pivot_rent_for['地区'] = pivot_rent_for.index
# pivot_rent_for.head()
pivot_rent_for = pivot_rent_for[['地区','2014', '2015', '2016', '2017', '2018', '2019']]
headers = list(pivot_rent_for.columns)
data = [go.Table(header=dict(values=headers, font=dict(size=10), align='left', fill = dict(color='#d562be')),
cells=dict(values=[pivot_rent_for[k].tolist() for k in pivot_rent_for.columns],align='left'))]
layout = dict(title="2014年到2019(截止0615)豆瓣深圳租房团限女生帖子数占比",height=400)
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)
marked = (df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.district != "")& (pd.notna(df_rent_plot.room_rent))&(df_rent_plot.girl_only==1)
district_rent = df_rent_plot.loc[marked]
district_rent_year_girl = district_rent[['post_year', 'district', 'room_rent']].groupby(['post_year', 'district']).agg({'room_rent': filter_mean}).reset_index()
district_rent_year_girl = district_rent_year_girl.sort_values(by='post_year', ascending=True)
# district_rent_year['room_rent'] = district_rent_year['room_rent'].apply(lambda x: int(x))
marked = (df_rent_plot.rent_type.isin(['招租','转租']))& (df_rent_plot.district != "")& (pd.notna(df_rent_plot.room_rent))
district_rent = df_rent_plot.loc[marked]
district_rent_year = district_rent[['post_year', 'district', 'room_rent']].groupby(['post_year', 'district']).agg({'room_rent': filter_mean}).reset_index()
district_rent_year = district_rent_year.sort_values(by='post_year', ascending=True)
district_rent_year_girl.rename(columns={'room_rent': 'room_rent2'}, inplace=True)
district_rent_year = district_rent_year.merge(district_rent_year_girl, how='left', on=['post_year', 'district'])
district_rent_year = district_rent_year.sort_values(by="room_rent", ascending=False)
year_list = sorted(district_rent_year.post_year.unique())[::-1]
data = []
visible_list=[True, False,False,False,False,False]
for i, post_year in enumerate(year_list):
df = district_rent_year.loc[district_rent_year.post_year==post_year]
trace1 = go.Bar(
x=df['district'],
y=df['room_rent'],
name='平均单房月租',
marker = dict(color = 'blue', opacity=1),
visible=visible_list[i]
)
trace2 = go.Bar(
x=df['district'],
y=df['room_rent2'],
name='限女生帖子平均单房月租',
yaxis='y',
marker = dict(color = 'purple', opacity=1),
visible=visible_list[i]
)
data.append(trace1)
data.append(trace2)
update_menus = \
[
dict(active=0,
x=0.02,
y=1.15,
buttons=[
dict(label = year_list[0],
method = 'update',
args = [{'visible': [True]*2 + [False]*10},
{'title': "{0}年豆瓣深圳租房团限女生帖与整体平均单房房租对比".format(year_list[0])}]),
dict(label = year_list[1],
method = 'update',
args = [{'visible': [False]*2 + [True]*2 + [False]*8},
{'title': "{0}年豆瓣深圳租房团限女生帖与整体平均单房房租对比".format(year_list[1])}]),
dict(label = year_list[2],
method = 'update',
args = [{'visible': [False]*4 + [True]*2 + [False]*6},
{'title': "{0}年豆瓣深圳租房团限女生帖与整体平均单房房租对比".format(year_list[2])}]),
dict(label = year_list[3],
method = 'update',
args = [{'visible': [False]*6 + [True]*2 + [False]*4},
{'title': "{0}年豆瓣深圳租房团限女生帖与整体平均单房房租对比".format(year_list[3])}]),
dict(label = year_list[4],
method = 'update',
args = [{'visible': [False]*8 + [True]*2 + [False]*2},
{'title': "{0}年豆瓣深圳租房团限女生帖与整体平均单房房租对比".format(year_list[4])}]),
dict(label = year_list[5],
method = 'update',
args = [{'visible': [False]*10 + [True]*2},
{'title': "{0}年豆瓣深圳租房团限女生帖与整体平均单房房租对比".format(year_list[5])}])
],
)
]
layout = dict(
title = "2019年豆瓣深圳租房团限女生帖与全部帖平均单房房租对比",
yaxis = dict(side = 'left' ,ticksuffix="元",zeroline = False),
#yaxis2 = dict(side = 'right', tickformat= "%",overlaying = "y" , showgrid = False, zeroline = False),
legend=dict(x=1.05, y=0.8),
#updatemenudefaults=update_menus,
updatemenus = update_menus)
fig = dict(data=data, layout=layout)
plotly.offline.iplot(fig)
from IPython.display import HTML
# %%html
# <script> $('div.input').hide()</script>
# <style>div.prompt{display:none} </style>
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
$('div.prompt').hide();
} else {
$('div.input').show();
$('div.prompt').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="toggle on/off code"></form>''')